""" Get the bootstrapped moments which can then be used to get the
confidence intervals.
"""
import stata_setup
stata_setup.config("/Applications/Stata/", "se")

from pystata import stata

import pandas as pd
import numpy as np
import subprocess
import os
import time
import json
import sys
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

sys.path.append('./')

from src.data_py import utils
from src.run_scripts import utils as utils_run_scripts

#%% CONFIG ------------------------------------------------------------------------------
try:
    n_replications = int(sys.argv[1])
except:
    n_replications = 200

utilization_status = [
    "Drilling",
    "Workover",
    "Production",
    "Inspection",
    "Waiting on Loc",
    "Enroute",
    "Accommodation",
    # "Modification"
]

nonutilization_status = [
    "Cold Stacked",
    "Ready Stacked",
    # "Modification"
]
np.random.seed(1000)

#%% IMPORT THE DATA ---------------------------------------------------------------------
df_ihs_contracts = pd.read_csv('./data_py/processed/contracts_final.csv', index_col=[0])
df_state = pd.read_csv(
    './data_py/processed/states_month.csv',
    index_col=[0],
    parse_dates=['date', 'month']
)
df_state['2006'] = (df_state['date'].dt.year.isin([2006]))
df_rigzone_status = pd.read_csv(
    f'./data_py/processed/rigzone_status_for_bootstrap.csv',
    index_col=[0],
    parse_dates=['stat start']
)
#df_rigzone_status = df_rigzone_status[
 #   (df_rigzone_status['stat start'].dt.year <= 2009)
#]

df_rigzone_contracts = pd.read_csv(
    f'./data_py/processed/rigzone_contracts_for_bootstrap.csv',
    index_col=[0],
    parse_dates=['contract_start_month', 'contract_start_fortnight']
)
df_gas_by_time = dict()
df_gas_by_time['month'] = pd.read_csv(
    f'./data_py/processed/gas_prices_deflated.csv',
    index_col=[0],
    parse_dates=['date', 'month']
)
df_gas_by_time['fortnight'] = pd.read_csv(
    f'./data_py/processed/gas_prices_deflated_fortnight.csv',
    index_col=[0],
    parse_dates=['date']
)
delta = pd.read_csv('./models/smm_input/delta.csv', index_col=[0]).values[0][0]

for t in ['month']:
    price_match_values_by_spec = dict()
    for spec in ['low', 'mid', 'high']:
        price_match_values_by_spec[spec] = dict()
        for n in [0, 1, 2, 3]:
            with open(f"./models/price_match/price_match_values_{spec}_{n}_{t}.json") as f:
                price_match_values_by_spec[spec][n] = np.array(json.load(f))

#%% GET THE STATE DATA ------------------------------------------------------------------
rigs_ihs_id_boom_spec = dict()
rigs_ihs_id_boom_spec_contracts = dict()
df_rigzone_status['rig_name_extra'] = 'nan'
for spec in df_rigzone_status['max wd'].unique():
    df_rigzone_status.loc[(
            (df_rigzone_status['max wd'] == spec)
        ), 'rig_name_extra'] \
        = df_rigzone_status.loc[(
            (df_rigzone_status['max wd'] == spec)
        ), 'rig name'] + f'_{spec}'

    df_rigzone_contracts.loc[(
            (df_rigzone_contracts['max wd'] == spec)
        ), 'rig_name_extra'] \
        = df_rigzone_contracts.loc[(
            (df_rigzone_contracts['max wd'] == spec)
        ), 'rig name'] + f'_{spec}'

    rigs_ihs_id_boom_spec[spec] = df_rigzone_status.loc[(
        (df_rigzone_status['max wd'] == spec)
   ), 'rig_name_extra'].unique()

    rigs_ihs_id_boom_spec_contracts[spec] = df_rigzone_contracts.loc[(
        (df_rigzone_contracts['max wd'] == spec)
    ), 'rig_name_extra'].unique()

df_rigzone_status_index = df_rigzone_status.set_index('rig_name_extra')
df_rigzone_contracts_index = df_rigzone_contracts.set_index('rig_name_extra')

#%%
df_rigzone_status_by_seed = dict()
df_rigzone_contracts_by_seed = dict()
df_state_by_time_by_seed = dict()
df_agg_by_time_by_seed = dict()

for k in tqdm(range(n_replications)):
    ids_seed_status = list()
    ids_seed_contracts = list()
    for spec in df_rigzone_contracts['max wd'].unique():
        ids_seed_status_spec = np.random.choice(
            rigs_ihs_id_boom_spec[spec],
            size=rigs_ihs_id_boom_spec[spec].size,
            replace=True)
        ids_seed_status.extend(list(ids_seed_status_spec))

        # np.random.seed(k)
        ids_seed_contracts_spec = np.random.choice(
            rigs_ihs_id_boom_spec_contracts[spec],
            size=rigs_ihs_id_boom_spec_contracts[spec].size,
            replace=True)

    df_rigzone_status_by_seed[k] \
        = df_rigzone_status_index.loc[ids_seed_status].reset_index()
    df_rigzone_contracts_by_seed[k] \
        = df_rigzone_contracts_index.loc[ids_seed_contracts_spec].reset_index()

    # Get the states
    (
        df_state_by_time_by_seed[k],
        df_agg_by_time_by_seed[k]
    ) = utils.construct_states(
        df=df_rigzone_status_by_seed[k],
        df_contracts=df_rigzone_contracts_by_seed[k],
        df_gas_by_time=df_gas_by_time,
        utilization_status=utilization_status,
        nonutilization_status=nonutilization_status
    )

    # Shorten states to <= 2009
    df_state_by_time_by_seed[k]['month'] = df_state_by_time_by_seed[k]['month'][
        df_state_by_time_by_seed[k]['month']['date'] <= pd.to_datetime('2009-12-31')]
    df_agg_by_time_by_seed[k]['month'] = df_agg_by_time_by_seed[k]['month'][
        df_agg_by_time_by_seed[k]['month'].index.get_level_values(1) <= pd.to_datetime('2009-12-31')]

    # Save all
    df_state_by_time_by_seed[k]['month'].to_csv(
        f'./models/bootstrap/temp/df_state_month{k}.csv')


#%% RUN SMOOTHING COMMAND IN STATA ------------------------------------------------------
stata.run(f"""
    forvalues i=0/{n_replications - 1} {{
        import delimited "./models/bootstrap/temp/df_state_month`i'.csv", clear 
    
        rename v1 month_id
    
        quietly lpoly gas month_id, at(month_id) generate(g) bwidth(1) nograph
        quietly lpoly n_available_low month_id, at(month_id) generate(n_l) bwidth(1) nograph
        quietly lpoly n_available_mid month_id, at(month_id) generate(n_m) bwidth(1) nograph
        quietly lpoly n_available_high month_id, at(month_id) generate(n_h) bwidth(1) nograph
    
        export delimited using "./models/bootstrap/processed/df_state_month`i'.csv", replace
    }}"""
)
#subprocess.run(
 #   f'/Applications/Stata/StataSE.app/Contents/MacOS/StataSE -b '
 #   f'do src/run_scripts/run_smooth_states_bootstrap.do {n_replications - 1} &',
  #  shell=True
#)
#time.sleep(20)

#%% IMPORT THE SMOOTHED DATA ------------------------------------------------------------
df_state_smooth_by_seed = dict()
for k in range(n_replications):
    # Note processed version comes from state command above
    df_state_smooth_by_seed[k] = pd.read_csv(
        f'./models/bootstrap/processed/df_state_month{k}.csv',
        index_col=[0],
        parse_dates=['date']
    )
    df_state_smooth_by_seed[k]['2006'] \
        = (df_state_smooth_by_seed[k]['date'].dt.year.isin([2006]))

#%% RESAMPLE IHS DATA -------------------------------------------------------------------
for spec in ['low', 'mid', 'high']:
    df_ihs_contracts.loc[df_ihs_contracts['spec'] == spec, 'price_match_id'] = \
        range(len(df_ihs_contracts.loc[df_ihs_contracts['spec'] == spec]))

rigs_ihs_id_boom_spec = dict()
df_ihs_contracts['contract_id_extra'] = 'nan'
df_ihs_contracts['contract_id'] = df_ihs_contracts['contract_id'].astype(str)
for spec in df_ihs_contracts['max_wd'].unique():
    for boom in [True, False]:
        df_ihs_contracts.loc[(
                (df_ihs_contracts['max_wd'] == spec)
                & (df_ihs_contracts['boom'] == boom)
            ), 'contract_id_extra'] \
            = df_ihs_contracts.loc[(
                (df_ihs_contracts['max_wd'] == spec)
                & (df_ihs_contracts['boom'] == boom)
            ), 'contract_id'] + f'_{spec}_{boom}'
        rigs_ihs_id_boom_spec[(spec, boom)] = df_ihs_contracts.loc[(
                (df_ihs_contracts['max_wd'] == spec)
                & (df_ihs_contracts['boom'] == boom)
            ), 'contract_id_extra'].unique()
df_ihs_contracts_index = df_ihs_contracts.set_index('contract_id_extra')

df_ihs_contracts_by_seed = dict()
price_match_values_by_spec_by_seed = dict()
for k in range(n_replications):
    ids_seed_contracts = list()
    price_match_values_by_spec_by_seed[k] = dict()

    # Get draws within a rig-spec-boom-bust rig type bin...
    draws_by_boom_spec = list()
    for spec in df_ihs_contracts['max_wd'].unique():
        for boom in [True, False]:
            # np.random.seed(k)
            a = np.random.choice(
                list(rigs_ihs_id_boom_spec[(spec, boom)]),
                size=len(list(rigs_ihs_id_boom_spec[(spec, boom)])),
                replace=True)
            ids_seed_contracts.extend(list(a))

    #ids_seed_contracts = np.random.choice(
     #           list(df_ihs_contracts.index),
      #          size=len(list(df_ihs_contracts.index)),
       #         replace=True)

    df_ihs_contracts_by_seed[k] \
        = df_ihs_contracts_index.loc[ids_seed_contracts].reset_index()
    df_ihs_contracts_by_seed[k].to_csv(
        f'./models/bootstrap/processed/df_contracts_month{k}.csv')

    # Deal with price_match_values_by_spec
    for n in [0, 1, 2, 3]:
        price_match_values_by_spec_by_seed[k][n] = dict()
        for spec in ['low', 'mid', 'high']:
            price_match_values_by_spec_by_seed[k][n][spec] \
                = price_match_values_by_spec[spec][n][
                    df_ihs_contracts_by_seed[k].loc[
                        df_ihs_contracts_by_seed[k]['spec'] == spec,
                        'price_match_id'].values.astype('int')
            ]
            with open(
                    f"./models/bootstrap/price_match/"
                    f"price_match_values_{spec}_{n}_month{k}.json",
                    "w"
            ) as outfile:
                json.dump(list(price_match_values_by_spec_by_seed[k][n][spec]), outfile)


#%% CONSTRUCT MOMENTS -------------------------------------------------------------------
agg_moments_by_seed = dict()
coefs_by_seed = dict()
for k in range(n_replications):
    agg_moments_by_seed[k] = utils_run_scripts.get_aggregated_moments_from_data(
        df_contracts=df_ihs_contracts_by_seed[k],
        df_state=df_state_smooth_by_seed[k]
    )
    pd.Series(agg_moments_by_seed[k]).to_csv(
        f'./models/bootstrap/processed/moments_empirical{k}.csv')

    coefs_data = utils_run_scripts.get_price_coefficients(
        df_ihs_contracts_by_seed[k], price_match_values_by_spec_by_seed[k][3], delta)

    pd.Series(coefs_data).to_csv(
        f'./models/bootstrap/processed/coefs_data_month{k}.csv')
